/*
This program is largely file-clean-up and variable organization.
However, it implements the following important data transformations:
	1) Consolidates the refix location-based files with the main files, generating complete times series from 1984-2017.
		-See bhreadme.docx and do files 01b, and 04a for more info on the refix files
	2) Drops bank-level-data, delivering 3 location-based datasets
	3) A variable is created that flags locations that were ENTIRELY made up of 034 form filers, and thus have no renpl bank health measures pre-1985q2.  
		-This variable is named, e.g., all_L05s_034filers_inMSA
	4) Sets some variables equal to 0 where missing for validation purposes, e.g., the countof* variables.
	5) Lastly, this file organizes/orders the variables by 'entity-type' (e.g., L05ent Weighted Averages, with L05ent percentile-based measures, 
	   L05ent counts, etc.)
*/


local out2020_dir <place your filepaths here>
cd "`out2020_dir'"

*sets up file naming structure and variable organization
local locTypes 		"MSA nMSA ST"

local ren_rats 		"renR  	renaR2"
local npl_rats 		"nplR  	npaR2"
local ratios 		"`ren_rats' `npl_rats'"
local ratios_re 	"`ren_rats'"

local re_commentLabel "[034 filers [rcon9804==54] removed]"


foreach locType of local locTypes {  //This loop outputs 3, location-based datasets: MSA, nMSA, ST


	if "`locType'"=="MSA" {

		local refixFileName "bh-04d-`locType'-Locations_refix-wavgsAndPercentiles"
		local mainFileName  "bh-04a-`locType'-Locations-wavgsAndPercentiles"
		
		local byLocVar "msabr" 		//sets by group at MSA-level (msabr) for MSA datasets 

	}

	if "`locType'"=="nMSA" {

		local refixFileName "bh-04e-`locType'-Locations_refix-wavgsAndPercentiles"
		local mainFileName  "bh-04b-`locType'-Locations-wavgsAndPercentiles"
		
		local byLocVar "stalpbr" 	//sets by group at state (stalpbr) for nMSA and ST datasets
	}

	if "`locType'"=="ST" {

		local refixFileName "bh-04f-`locType'-Locations_refix-wavgsAndPercentiles"
		local mainFileName  "bh-04c-`locType'-Locations-wavgsAndPercentiles"
		
		local byLocVar "stalpbr" 	//sets by group at state (stalpbr) for nMSA and ST datasets
		
	}

	
	/*This step:
	1) Reads in _refix data files (pre-1985q2)
	2) Clean/organize variables (appends suffix *_re to relevant variables, which is relevant to step #4 in this list)
	3) Merges with 'main' BH data
	4) Overwrites renpl measures for the relevant locations/dates
	*/
	
	use `refixFileName',clear
	

	local ents 		"ent L05ent L05bhc M05ent M05bhc L10ent L10bhc M10ent M10bhc"
	local pcntls 10 25 50

	*------- ORGANIZE VARIABLES BY ENTITY-TYPE --------------
	*e.g., All variables relating to L05ents go together, all relating to M05 go together, etc.
	
	local refix_vars "" 			//populate list of variables related to the refix process for renaming, ordering, etc
	
	foreach ent of local ents {	
		foreach r of local ratios_re {
			foreach p of local pcntls { 	
				local refix_vars 	"`refix_vars' `r'_p`p'_by_`ent'_2_`locType'" 		//Organize Percentile Variables
			}
				local refix_vars 	"`refix_vars' `ent'_2_`locType'_x_`r'_WAVG"		//Organize Weighted Average Variables, e.g., L05ent_2_MSA_x_renR_WAVG
		}
				local refix_vars 	"`refix_vars'  countof_`ent's_in_`locType'"		//Organize Entity Count Variables
	}
	
	local bhc_stndln_vars "countof_bhcs_in_`locType'"							//Tag BHC-id Count Variable - there is no 'regular' bhc level aggregation - keeping track of the number (count) of top holder ids in a location





	*-------BUILD VARIABLE LIST FOR LOCATION-BASED DEPOSIT SHARES--------------

	local locShr_vars "" //populate list of 'LocShr' variables for renaming, ordering, etc:
		**Note: These variables capture the share of a location's total deposits classified as belonging to Local entities vs. Multi-Locational entities 
	

	
	
	local LMents "L05ent L05bhc M05ent M05bhc L10ent L10bhc M10ent M10bhc" 		//list that excludes the overall 'ent' measures (non local/multi measures) 
	foreach LMent of local LMents {
		local locShr_vars "`locShr_vars' LocShr`LMent'_2_`locType'" 			//...Local vs. Multi Deposit shares in each location - n/a for 'regular' banks
	}

	
	
	***GATHER ALL VARIABLES FOR REFIX DATES/LOCATIONS***********
	local refix_vars 	 "`refix_vars' `locShr_vars' `bhc_stndln_vars'"


	local mrg_vars "dateq `byLocVar' ent" //check that we can merge without - rcon9804 renR renaR2

		
	keep `mrg_vars' `refix_vars'
	
	*Append '_re' to varname (_refix is too long for Stata in some cases. After variable-name-cleanup below, we rename these again with 
	*the suffix _refix that is referenced throughout this documentation. )
	
	renvars `refix_vars',postf(_re)   //Note: renvars is an old stata package name for 'rename variables' - - not to be confused with anything related to real estate npls 
									
									
	duplicates tag `mrg_vars',gen(tag)
	assert tag==0
	drop tag

	
	
		*Refix data merged back into main (full time series) data:
		*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
		merge 1:1 `mrg_vars' using `mainFileName'  
		*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

	assert _merge!=1
	assert rcon9804==54 if _merge==2 & dateq<=tq(1985q1)  //the 034 form filers (rcon9804==54) that were removed in do file *-03a-*
	assert dateq<tq(1985q2) if _merge==3 																						


	
	************OVERWRITE RENPL MEASURES WITH REFIX DATA IF LOCATION HAS 034 FILERS PRIOR TO 1985q2
	
	*Note: If a location had no 034 form filers prior to 1985q2, this step overwrites the relevant measures with identical values.
		local ents 	"ent L05ent L05bhc M05ent M05bhc L10ent L10bhc M10ent M10bhc"
		local pcntls 10 25 50

		foreach r of local ratios_re {
		
			foreach e of local ents {
			
				foreach p of local pcntls {
																
					replace  `r'_p`p'_by_`e'_2_`locType' 	= `r'_p`p'_by_`e'_2_`locType'_re if  _merge==3  
					drop `r'_p`p'_by_`e'_2_`locType'_re 	

					
				}

				replace `e'_2_`locType'_x_`r'_WAVG  		= `e'_2_`locType'_x_`r'_WAVG_re 	if  _merge==3 	
				drop `e'_2_`locType'_x_`r'_WAVG_re
				
			}
		}


		duplicates drop dateq `byLocVar',force //unique date-location observations now

		
		
	
		//Bank-Level variables no longer needed - Kept them up to this point for bank-level validation checks.
		#delimit ;
			drop _merge *_flag *_TotDep  *cusum* total* 
			 ent off_num bhc rcon9804 nplR npaR2 renR renaR2 	
			ent_2_`locType' `locType'_2_ent bhc_2_`locType' `locType'_2_bhc 
			;
		#delimit cr
		
	
	


	*=============RENAME BH MEASURES===========
	
		local wavgs "*_WAVG"
		unab wavgs:`wavgs'	
		
		foreach x of local wavgs {
			renvars `x',subst(_2_`locType'_x "")  //shortens variable names for weighted avgs
		}
			
		drop *_x_* //drops the bank-level cumulative sum vars

		local ents 	"ent L05ent M05ent L10ent M10ent L05bhc M05bhc L10bhc M10bhc"

		foreach ratio of local ratios {
			foreach ent of local ents {
				foreach pcnt in 10 25 50 	{
					rename `ratio'_p`pcnt'_by_`ent'_2_`locType'	 `ent'_`ratio'_p`pcnt' 	//shortens variable names for percentiles
				}
			}
		}
		

	
	*=============ORGANIZE "REGULAR" BANK HEALTH MEASURES , i.e., no Local/Multi designation
	
		local order_ent_1 "countof_ents_in_`locType' countof_ents_in_`locType'_re"
		local order_bhc_1 "countof_bhcs_in_`locType' countof_bhcs_in_`locType'_re"

		foreach ratio of local  ratios {

			local order_ent_2 "`order_ent_2' ent_`ratio'_WAVG  ent_`ratio'_p10 ent_`ratio'_p25 ent_`ratio'_p50"
			
		}
		
	
	
	*=============ORDER "LOCAL/MULTI ENTITY" BH MEASURES 
		local order_LMs_1 ""
		local LMents 	"L05ent M05ent L10ent M10ent L05bhc M05bhc L10bhc M10bhc"
		foreach LMent of local LMents {
		
			local order_LMs_1 "`order_LMs_1' countof_`LMent's_in_`locType' countof_`LMent's_in_`locType'_re LocShr`LMent'_2_`locType' LocShr`LMent'_2_`locType'_re"
				
				foreach ratio of local  ratios {
				
					local order_LMs_1 "`order_LMs_1' `LMent'_`ratio'_WAVG `LMent'_`ratio'_p10 `LMent'_`ratio'_p25 `LMent'_`ratio'_p50"		
					
				}
		}

	
	
	*Finalize which vars to keep and order them
	keep 	dateq `byLocVar' `order_ent_1' `order_bhc_1' `order_ent_2' `order_LMs_1' 
	order 	dateq `byLocVar' `order_ent_1' `order_bhc_1' `order_ent_2' `order_LMs_1' 

	
	*===========================================================================================
	*SOME FINAL CLEANING / LABELING / RENAMING
	
	renvars, subst(_in_`locType' "")
	
	*FOR CLARITY - ORGANIZING REGULAR ENTITIES AND LOCAL/MULTI ENTITIES SEPARATELY
	
	
	
	
	*REGULAR ENTITIES
	********************************************************************************
			foreach entOrBhc in ent bhc {

				if "`entOrBhc'"=="ent"	local entLabel "Banks"	
				if "`entOrBhc'"=="bhc"	local entLabel "BHCs(BHC-ids)"	

			/*this step replaces missings with zeroes and makes refix count equal to main full time series count AFTER 1985Q1.
			This is optional, but again, makes validation easier to loop through.
			*/
				replace countof_`entOrBhc's_re=countof_`entOrBhc's 	if dateq>=tq(1985q2) & countof_`entOrBhc's_re==.  	
				replace countof_`entOrBhc's_re=0 					if dateq<tq(1985q2)  & countof_`entOrBhc's_re==.
				replace countof_`entOrBhc's=0 						if countof_`entOrBhc's==.

				
				*Flag locations that were made up of ENTIRELY 034 filers [rcon9804==54], and thus 'disappear' from the renpl cohort before 1985q2
				*NPL measures are present for those locations and years, so we simply flag them.
				gen all_`entOrBhc's_034filers_in`locType'=1 		if countof_`entOrBhc's_re==0 & dateq<tq(1985q2) & countof_`entOrBhc's!=0
					la var countof_`entOrBhc's 						"Count `entLabel' in `locType'"
					la var countof_`entOrBhc's_re 					"Count `entLabel' in `locType's-`re_commentLabel'"
					la var all_`entOrBhc's_034filers_in`locType' 	"All `entLabel' in `locType' file 034 form, [rcon9804==54]. No Real Estate measures for `locType'"
			}
			
			
			
			foreach ratio of local  ratios {
			
				if "`ratio'"=="nplR"	local ratioLabel "AvgNonPerfLnRatio"
				if "`ratio'"=="npaR2"	local ratioLabel "AvgNonPerfAssetRatio"	
				if "`ratio'"=="renR"	local ratioLabel "AvgRealEstate-NonPerfLnRatio"
				if "`ratio'"=="renaR2" 	local ratioLabel "AvgRealEstate-NonPerfAssetRatio"
		
				la var ent_`ratio'_WAVG 			"`ratioLabel' for `locType' weighted by shares of `locType's total deposits"	
				
				local pcnts "10 25 50"	
				foreach pcnt of local pcnts {
				
					la var ent_`ratio'_p`pcnt' 		"`ratioLabel' for `locType' - represents measure of entity @ `pcnt'th percentile"
					
				}
			}
			
						
			********************************************************************************
		    *Making RENPL measures explicitly equal to missing for locations that disappear from sample due to having ONLY 034 form filers
			
			foreach ratio of local  ratios_re {
			
			
				assert inlist(ent_`ratio'_WAVG,0,.) if  all_ents_034filers_in`locType'==1
				replace ent_`ratio'_WAVG=. 			if  all_ents_034filers_in`locType'==1

				local pcnts "10 25 50"	
				foreach pcnt of local pcnts {
					assert inlist(ent_`ratio'_p`pcnt',0,.) 	if  all_ents_034filers_in`locType'==1
					replace ent_`ratio'_p`pcnt'=. 			if  all_ents_034filers_in`locType'==1
					
				}
			}
	
	
	********************************************************************************
	***LOCAL/MULTI BANKS
	********************************************************************************
		local 	LMents 	"L05ent M05ent L10ent M10ent L05bhc M05bhc L10bhc M10bhc"
		foreach LMent of local LMents {
		
			local locMult=substr("`LMent'",1,3)
			local entOrBhc=substr("`LMent'",4,3)

			if "`entOrBhc'"=="ent"	local entLabel "Banks"	
			if "`entOrBhc'"=="bhc"	local entLabel "BHCs(BHC-ids)"	

			if "`locMult'"=="L05"	local locMultLabel "defined as Local[5%]"
			if "`locMult'"=="M05"	local locMultLabel "defined as Multi[5%]"
			if "`locMult'"=="L10"	local locMultLabel "defined as Local[10%]"
			if "`locMult'"=="M10"	local locMultLabel "defined as Multi[10%]"


				
		
			replace countof_`LMent's_re=countof_`LMent's 	if dateq>=tq(1985q2) & countof_`LMent's_re==.
			replace countof_`LMent's_re=0 					if dateq<tq(1985q2)  & countof_`LMent's_re==.
			replace countof_`LMent's=0 						if countof_`LMent's==.
		

			*gen flag for locations where ALL Local | Multi entities are 034 form filers:
			
			gen  all_`LMent's_034filers_in`locType'=1 if 	countof_`LMent's_re==0 & dateq<tq(1985q2) & countof_`LMent's!=0
			
				la var countof_`LMent's 			"Total `entLabel' `locMultLabel' in `locType'"
				la var countof_`LMent's_re 		"Total `entLabel' `locMultLabel' in `locType' `re_commentLabel'"
				la var all_`LMent's_034filers_in`locType' "All `entLabel' in `locType' file 034[rcon9804==54].No RealEstate measures for `locType'"
			
			
			replace LocShr`LMent'_2_`locType'=0       							if LocShr`LMent'_2_`locType'==.
			replace LocShr`LMent'_2_`locType'_re = LocShr`LMent'_2_`locType' 	if dateq>=tq(1985q2)
			
			replace LocShr`LMent'_2_`locType'_re=0 							if LocShr`LMent'_2_`locType'_re==.
			
			
				la var LocShr`LMent'_2_`locType'       "Total `entLabel' `locMultLabel' deposits  in `locType' / total `locType' deposits"
				la var LocShr`LMent'_2_`locType'_re 	"Total `entLabel' `locMultLabel' deposits  in `locType' / total `locType' deposits `re_commentLabel'"


			foreach ratio of local  ratios {
			
				if "`ratio'"=="nplR"	local ratioLabel "AvgNonPerfLnRatio"
				if "`ratio'"=="npaR2"	local ratioLabel "AvgNonPerfAssetRatio"	
				if "`ratio'"=="renR"	local ratioLabel "AvgRealEstate-NonPerfLnRatio"
				if "`ratio'"=="renaR2" 	local ratioLabel "AvgRealEstate-NonPerfAssetRatio"
			
				la var `LMent'_`ratio'_WAVG 		"`ratioLabel' for `locType' weighted by shares of `locType's `LMent' total deposits"
				 
				local pcnts "10 25 50"	
				foreach pcnt of local pcnts {	
				
					la var `LMent'_`ratio'_p`pcnt' "`ratioLabel' for `locType' - represents measure of `LMent' @ `pcnt'th percentile"

									
				} 		
			} 			
			
					
		********************************************************************************
		*Making RENPL measures explicitly equal to missing for locations that disappear from sample due to having ONLY 034 form filers
		foreach ratio of local  ratios_re {
		
			assert  inlist(`LMent'_`ratio'_WAVG,0,.) 	if  all_`LMent's_034filers_in`locType'==1
			replace `LMent'_`ratio'_WAVG=.  			if  all_`LMent's_034filers_in`locType'==1

			local pcnts "10 25 50"	
			foreach pcnt of local pcnts {
				assert inlist(`LMent'_`ratio'_p`pcnt',0,.) 	if  all_`LMent's_034filers_in`locType'==1
				replace `LMent'_`ratio'_p`pcnt'=. 			if  all_`LMent's_034filers_in`locType'==1
			}
			
		}
		********************************************************************************		
		
	} 			 //end LMents loop		


	
	******FINAL VARIABLE RENAMING
	
	

	renvars, subst(renaR2_ 	ren_asset_) 
	renvars, subst(npaR2_ 	npl_asset_)

	local 	ents 	"ent L05ent M05ent L10ent M10ent L05bhc M05bhc L10bhc M10bhc"
	foreach ent of local ents {
		
		la var `ent'_ren_asset_WAVG 			"Avg (renpls/assts) 4 `ent' wghtd by `ent's shr of `locType'"		
		la var `ent'_npl_asset_WAVG 			"Avg (npls/assts) 4 `ent' wghtd by `ent's shr of `locType'"

		local pcnts "10 25 50"	
		foreach pcnt of local pcnts {
				la var `ent'_ren_asset_p`pcnt' 		"(renpls/assts) of `ent' @ `pcnt'th pcntl"	
				la var `ent'_npl_asset_p`pcnt' 		"(npls/assts) of `ent' @ `pcnt'th pcntl"	
				
		}
	}
	
		rename *_re *_refix  //back to '_refix' now that variables names have been shortened


			*######################
			save bh-05a-allBhMeasuresBy`locType',replace
			*######################

} 




